{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys \n",
    "sys.path.append('../scripts/')\n",
    "from dynamic_programming import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class BeliefDynamicProgramming(DynamicProgramming): \n",
    "    def __init__(self, widths, goal, puddles, time_interval, sampling_num, puddle_coef=100.0, \\\n",
    "                 lowerleft=np.array([-4, -4]).T, upperright=np.array([4, 4]).T, dev_borders=[0.1,0.2,0.4,0.8]): \n",
    "        super().__init__(widths, goal, puddles, time_interval, sampling_num, puddle_coef, lowerleft, upperright)\n",
    "        \n",
    "        self.index_nums = np.array([*self.index_nums, len(dev_borders) + 1])\n",
    "        nx, ny, nt, nh = self.index_nums\n",
    "        self.indexes = list(itertools.product(range(nx), range(ny), range(nt), range(nh)))\n",
    "        \n",
    "        self.value_function, self.final_state_flags =  self.init_belief_value_function()\n",
    "        self.policy = np.zeros(np.r_[self.index_nums,2]) \n",
    "        \n",
    "        self.dev_borders = dev_borders ###beliefdp3（calc_motion_sigma_transition_probsまで） #以下追加\n",
    "        self.dev_borders_side = [dev_borders[0]/10, *dev_borders, dev_borders[-1]*10] #dev_bordersに両側を加える\n",
    "        self.motion_sigma_transition_probs = self.init_motion_sigma_transition_probs()\n",
    "        \n",
    "    def init_motion_sigma_transition_probs(self):\n",
    "        probs = {}\n",
    "        for a in self.actions:\n",
    "            for i in range(len(self.dev_borders)+1):\n",
    "                probs[(i, a)] = self.calc_motion_sigma_transition_probs(self.dev_borders_side[i], self.dev_borders_side[i+1], a)\n",
    "                \n",
    "        return probs\n",
    "            \n",
    "    def cov_to_index(self, cov):\n",
    "        sigma = np.power(np.linalg.det(cov), 1.0/6)\n",
    "        for i, e in enumerate(self.dev_borders):\n",
    "            if sigma < e: return i\n",
    "            \n",
    "        return len(self.dev_borders)\n",
    "        \n",
    "    def calc_motion_sigma_transition_probs(self, min_sigma, max_sigma, action, sampling_num=100):\n",
    "        nu, omega = action\n",
    "        if abs(omega) < 1e-5: omega = 1e-5\n",
    "\n",
    "        F = matF(nu, omega, self.time_interval, 0.0) #ロボットの向きは関係ないので0[deg]で固定で\n",
    "        M = matM(nu, omega, self.time_interval, {\"nn\":0.19, \"no\":0.001, \"on\":0.13, \"oo\":0.2})#移動の誤差モデル（カルマンフィルタのものをコピペ）\n",
    "        A = matA(nu, omega, self.time_interval, 0.0)\n",
    "        \n",
    "        ans = {}\n",
    "        for sigma in np.linspace(min_sigma, max_sigma*0.999, sampling_num): #遷移前のσを作る（区間内に一様分布していると仮定）\n",
    "            index_after = self.cov_to_index(sigma*sigma*F.dot(F.T) + A.dot(M).dot(A.T)) #遷移後のσのインデックス\n",
    "            ans[index_after] = 1 if index_after not in ans else ans[index_after] + 1 #単にカウントしてるだけ（辞書の初期化もあるのでややこしい）\n",
    "                \n",
    "        for e in ans:\n",
    "            ans[e] /= sampling_num #頻度を確率に\n",
    "\n",
    "        return ans\n",
    "    \n",
    "    def init_belief_value_function(self): \n",
    "        v = np.empty(self.index_nums)\n",
    "        f = np.zeros(self.index_nums) \n",
    "        \n",
    "        for index in self.indexes:\n",
    "            f[index] = self.belief_final_state(np.array(index).T)\n",
    "            v[index] = self.goal.value if f[index] else -100.0\n",
    "                \n",
    "        return v, f\n",
    "        \n",
    "    def belief_final_state(self, index):\n",
    "        x_min, y_min, _ = self.pose_min + self.widths*index[0:3] \n",
    "        x_max, y_max, _ = self.pose_min + self.widths*(index[0:3] + 1) \n",
    "        \n",
    "        corners = [[x_min, y_min, _], [x_min, y_max, _], [x_max, y_min, _], [x_max, y_max, _] ] \n",
    "        return all([self.goal.inside(np.array(c).T) for c in corners ]) and index[3] == 0\n",
    "    \n",
    "    def action_value(self, action, index, out_penalty=True):\n",
    "        value = 0.0\n",
    "        for delta, prob in self.state_transition_probs[(action, index[2])]: \n",
    "            after, out_reward = self.out_correction(np.array(index[0:3]).T + delta) #indexを4次元から3次元に\n",
    "            after = tuple([*after, 0])                                                                     #エントロピーのインデックスを加える（とりあえず0で）\n",
    "            reward = - self.time_interval * self.depths[(after[0], after[1])] * self.puddle_coef - self.time_interval + out_reward*out_penalty\n",
    "            value += (self.value_function[after] + reward) * prob\n",
    "\n",
    "        return value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "puddles = [Puddle((-2, 0), (0, 2), 0.1), Puddle((-0.5, -2), (2.5, 1), 0.1)]\n",
    "dp = BeliefDynamicProgramming(np.array([0.2, 0.2, math.pi/18]).T, Goal(-3,-3), puddles, 0.1, 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{(0, (0.0, -2.0)): {0: 0.88, 1: 0.12},\n",
       " (1, (0.0, -2.0)): {1: 0.93, 2: 0.07},\n",
       " (2, (0.0, -2.0)): {2: 0.98, 3: 0.02},\n",
       " (3, (0.0, -2.0)): {3: 0.99, 4: 0.01},\n",
       " (4, (0.0, -2.0)): {4: 1.0},\n",
       " (0, (1.0, 0.0)): {0: 0.9, 1: 0.1},\n",
       " (1, (1.0, 0.0)): {1: 0.95, 2: 0.05},\n",
       " (2, (1.0, 0.0)): {2: 0.99, 3: 0.01},\n",
       " (3, (1.0, 0.0)): {3: 0.99, 4: 0.01},\n",
       " (4, (1.0, 0.0)): {4: 1.0},\n",
       " (0, (0.0, 2.0)): {0: 0.88, 1: 0.12},\n",
       " (1, (0.0, 2.0)): {1: 0.93, 2: 0.07},\n",
       " (2, (0.0, 2.0)): {2: 0.98, 3: 0.02},\n",
       " (3, (0.0, 2.0)): {3: 0.99, 4: 0.01},\n",
       " (4, (0.0, 2.0)): {4: 1.0}}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dp.motion_sigma_transition_probs ###amdp3result"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
